1. Introduction


2. Install R {corrplot} package

#install.packages("corrplot", quiet = TRUE)

3. Data for correlation analysis

library(dplyr, quietly = TRUE)

( res <- sample_n(tbl = mtcars, size = 10, replace = FALSE) )
##     mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## 1  13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4
## 2  33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
## 3  21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2
## 4  15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
## 5  24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
## 6  14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
## 7  19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
## 8  21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## 9  19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
## 10 16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
library(knitr, quietly = TRUE)

kable(x = res, caption = 'The random sampling of mtcars data set')
The random sampling of mtcars data set
mpg cyl disp hp drat wt qsec vs am gear carb
13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3

4. Computing correlation matrix

( M <- cor( mtcars ) )
##             mpg        cyl       disp         hp        drat         wt
## mpg   1.0000000 -0.8521620 -0.8475514 -0.7761684  0.68117191 -0.8676594
## cyl  -0.8521620  1.0000000  0.9020329  0.8324475 -0.69993811  0.7824958
## disp -0.8475514  0.9020329  1.0000000  0.7909486 -0.71021393  0.8879799
## hp   -0.7761684  0.8324475  0.7909486  1.0000000 -0.44875912  0.6587479
## drat  0.6811719 -0.6999381 -0.7102139 -0.4487591  1.00000000 -0.7124406
## wt   -0.8676594  0.7824958  0.8879799  0.6587479 -0.71244065  1.0000000
## qsec  0.4186840 -0.5912421 -0.4336979 -0.7082234  0.09120476 -0.1747159
## vs    0.6640389 -0.8108118 -0.7104159 -0.7230967  0.44027846 -0.5549157
## am    0.5998324 -0.5226070 -0.5912270 -0.2432043  0.71271113 -0.6924953
## gear  0.4802848 -0.4926866 -0.5555692 -0.1257043  0.69961013 -0.5832870
## carb -0.5509251  0.5269883  0.3949769  0.7498125 -0.09078980  0.4276059
##             qsec         vs          am       gear        carb
## mpg   0.41868403  0.6640389  0.59983243  0.4802848 -0.55092507
## cyl  -0.59124207 -0.8108118 -0.52260705 -0.4926866  0.52698829
## disp -0.43369788 -0.7104159 -0.59122704 -0.5555692  0.39497686
## hp   -0.70822339 -0.7230967 -0.24320426 -0.1257043  0.74981247
## drat  0.09120476  0.4402785  0.71271113  0.6996101 -0.09078980
## wt   -0.17471588 -0.5549157 -0.69249526 -0.5832870  0.42760594
## qsec  1.00000000  0.7445354 -0.22986086 -0.2126822 -0.65624923
## vs    0.74453544  1.0000000  0.16834512  0.2060233 -0.56960714
## am   -0.22986086  0.1683451  1.00000000  0.7940588  0.05753435
## gear -0.21268223  0.2060233  0.79405876  1.0000000  0.27407284
## carb -0.65624923 -0.5696071  0.05753435  0.2740728  1.00000000
round(M, 2)
##        mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
## mpg   1.00 -0.85 -0.85 -0.78  0.68 -0.87  0.42  0.66  0.60  0.48 -0.55
## cyl  -0.85  1.00  0.90  0.83 -0.70  0.78 -0.59 -0.81 -0.52 -0.49  0.53
## disp -0.85  0.90  1.00  0.79 -0.71  0.89 -0.43 -0.71 -0.59 -0.56  0.39
## hp   -0.78  0.83  0.79  1.00 -0.45  0.66 -0.71 -0.72 -0.24 -0.13  0.75
## drat  0.68 -0.70 -0.71 -0.45  1.00 -0.71  0.09  0.44  0.71  0.70 -0.09
## wt   -0.87  0.78  0.89  0.66 -0.71  1.00 -0.17 -0.55 -0.69 -0.58  0.43
## qsec  0.42 -0.59 -0.43 -0.71  0.09 -0.17  1.00  0.74 -0.23 -0.21 -0.66
## vs    0.66 -0.81 -0.71 -0.72  0.44 -0.55  0.74  1.00  0.17  0.21 -0.57
## am    0.60 -0.52 -0.59 -0.24  0.71 -0.69 -0.23  0.17  1.00  0.79  0.06
## gear  0.48 -0.49 -0.56 -0.13  0.70 -0.58 -0.21  0.21  0.79  1.00  0.27
## carb -0.55  0.53  0.39  0.75 -0.09  0.43 -0.66 -0.57  0.06  0.27  1.00

5. Correlogram : Visualizing the correlation matrix

library(corrplot, quietly = TRUE)

corrplot(M, method="circle")


6. Visualization methods

library(corrplot, quietly = TRUE)

corrplot(M, method="circle")

corrplot(M, method="square")

corrplot(M, method="ellipse")

# corrplot(M, method="number")
corrplot(M, method="shade")

corrplot(M, method="color")

corrplot(M, method="pie")

corrplot(M, method="number")


7. Types of correlogram layout

corrplot(M, type="upper")

corrplot(M, type="lower")


8. Reordering the correlation matrix

# correlogram with "hclust" reordering
corrplot(M, type="upper", order="hclust")

# Using different color spectrum
col<- colorRampPalette(c("red", "white", "blue"))(20)
corrplot(M, type="upper", order="hclust", col=col)

# Change background color to lightblue
corrplot(M, type="upper", order="hclust", col=c("black", "white"), bg="lightblue")


9. Changing the color of the correlogram

library(RColorBrewer, quietly = TRUE)

# brewer.pal{RColorBrewer} : ColorBrewer palettes
# Creates nice looking color palettes especially for thematic maps
corrplot(M, type="upper", order="hclust", col=brewer.pal(n=8, name="RdBu" ))

corrplot(M, type="upper", order="hclust", col=brewer.pal(n=8, name="RdYlBu"))

corrplot(M, type="upper", order="hclust", col=brewer.pal(n=8, name="PuOr"))


10. Changing the color and the rotation of text labels

corrplot(M, type="upper", order="hclust", tl.col="blue", tl.srt=45)


11. Combining correlogram with the significance test

# mat : is a matrix of data
# ... : further arguments to pass to the native R cor.test{stats} function
cor.mtest <- function(mat, ...) {
    mat <- as.matrix(mat)
    n <- ncol(mat)
    p.mat<- matrix(NA, n, n)
    diag(p.mat) <- 0
    
    for (i in 1:(n - 1)) {
        for (j in (i + 1):n) {
            tmp <- cor.test(mat[, i], mat[, j], ...)
            p.mat[i, j] <- p.mat[j, i] <- tmp$p.value
        }
    }
    
    colnames(p.mat) <- rownames(p.mat) <- colnames(mat)
    p.mat
} # end function

# matrix of the p-value of the correlation
p.mat <- cor.mtest(mtcars)

head(p.mat[, 1:5])
##                     mpg                  cyl                 disp
## mpg  0.0000000000000000 0.000000000611268714 0.000000000938032654
## cyl  0.0000000006112687 0.000000000000000000 0.000000000001802838
## disp 0.0000000009380327 0.000000000001802838 0.000000000000000000
## hp   0.0000001787835254 0.000000003477860669 0.000000071426786557
## drat 0.0000177623992875 0.000008244636190317 0.000005282021688157
## wt   0.0000000001293959 0.000000121756701354 0.000000000012223195
##                     hp           drat
## mpg  0.000000178783525 0.000017762399
## cyl  0.000000003477861 0.000008244636
## disp 0.000000071426787 0.000005282022
## hp   0.000000000000000 0.009988771894
## drat 0.009988771893526 0.000000000000
## wt   0.000041458274411 0.000004784260
# Specialized the insignificant value according to the significant level
corrplot(M, type="upper", order="hclust", p.mat = p.mat, sig.level = 0.01)

# Leave blank on no significant coefficient
corrplot(M, type="upper", order="hclust", p.mat = p.mat, sig.level = 0.01, insig = "blank")


12. Customize the correlogram

col <- colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", "#77AADD", "#4477AA"))

corrplot(
      M,
      method="color",
      col=col(200),
      type="upper",
      order="hclust", 
      # Add coefficient of correlation
      addCoef.col = "black",
      #Text label color and rotation
      tl.col="red",
      tl.srt=45,
      # Combine with significance
      p.mat = p.mat,
      sig.level = 0.01,
      insig = "blank",
      # hide correlation coefficient on the principal diagonal
      diag=FALSE 
  )


13. Conclusions